rm(list=ls())
require(quanteda)
quanteda_options(threads = 8)
source("functions.R")

toks <- readRDS("data_tokens_manual_en.RDS") %>% 
    tokens_remove('^[\\p{N}\\p{P}]+$', valuetype = 'regex') %>% 
    tokens_subset(topic == "Economy")
toks_lsd <- tokens_lookup(toks, data_dictionary_LSD2015)
dfmt_lsd <- dfm(toks_lsd)

dat <- docvars(dfmt_lsd)
dat <- cbind(dat, convert(dfmt_lsd, "data.frame"))
dat$length <- ntoken(toks) 

dat$lsd <- as.numeric(scale((dat$positive - dat$negative) / dat$length, center = FALSE))
saveRDS(dat, "data_lsd_en.RDS")

